Set up data

set.seed(42)

library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018g.
## 1.0/zoneinfo/Asia/Singapore'
library(ggplot2)
library(performanceEstimation) 
library(kernlab)
## Warning: package 'kernlab' was built under R version 3.4.4
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
train <- read.csv("data/A3_train.csv", colClasses = append(rep(c("numeric"), times = 15), "factor"))

cols <- c('x4', 'x8', 'y')

train_t <- train[1:1500, cols]
train_v <- train[1501:2000, cols]

# Plot to see how data look like
ggplot(train_t, aes(x = x4, y = x8, color = y)) + geom_point(shape = 1) + ggtitle("training")

ggplot(train_v, aes(x = x4, y = x8, color = y)) + geom_point(shape = 1) + ggtitle("validation")

calculate_auc <- function(model, test){
  pred_obj <- prediction(predictions = predict(model, test, type='prob')$DIFFICULTY, labels =  test$TARGET)
  perf_auc <- performance(pred_obj, measure = "auc")
  return(perf_auc@y.values)
}

Linear Kernel - Baseline

# model.linear <- svm(y ~ ., data=train_t, kernel = 'linear')
# pred.linear = predict(model.linear, train_v)
# plot(model.linear, train_v)
# confusionMatrix(pred.linear, train_v$y)

Polynomial Kernel - tuning

# Tuning svm model
poly.tune = tune.svm(y ~ ., data=train_t, kernel="polynomial",degree=c(4,5), coef0=c(0.5,1), gamma = c(0.5,1))
svm_poly <- poly.tune$best.model
svm_poly
## 
## Call:
## best.svm(x = y ~ ., data = train_t, degree = c(4, 5), gamma = c(0.5, 
##     1), coef0 = c(0.5, 1), kernel = "polynomial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  5 
##       gamma:  1 
##      coef.0:  0.5 
## 
## Number of Support Vectors:  1033
# Best Parameters:
#    SVM-Type:  C-classification 
#  SVM-Kernel:  polynomial 
#        cost:  1 
#      degree:  5 
#       gamma:  1 
#      coef.0:  0.5 
     
# Plot the kernel boundary and see how it is segment that 2 classes
plot(svm_poly, train_t)

plot(svm_poly, train_v)

#Predict with best model
pred.poly.t <- predict(svm_poly, train_t)
confusionMatrix(pred.poly.t, train_t$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 428 276
##          1 162 634
##                                           
##                Accuracy : 0.708           
##                  95% CI : (0.6843, 0.7309)
##     No Information Rate : 0.6067          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4083          
##  Mcnemar's Test P-Value : 6.688e-08       
##                                           
##             Sensitivity : 0.7254          
##             Specificity : 0.6967          
##          Pos Pred Value : 0.6080          
##          Neg Pred Value : 0.7965          
##              Prevalence : 0.3933          
##          Detection Rate : 0.2853          
##    Detection Prevalence : 0.4693          
##       Balanced Accuracy : 0.7111          
##                                           
##        'Positive' Class : 0               
## 
pred.poly.v <- predict(svm_poly, train_v)
confusionMatrix(pred.poly.v, train_v$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 145  95
##          1  59 201
##                                           
##                Accuracy : 0.692           
##                  95% CI : (0.6495, 0.7322)
##     No Information Rate : 0.592           
##     P-Value [Acc > NIR] : 2.37e-06        
##                                           
##                   Kappa : 0.3794          
##  Mcnemar's Test P-Value : 0.004797        
##                                           
##             Sensitivity : 0.7108          
##             Specificity : 0.6791          
##          Pos Pred Value : 0.6042          
##          Neg Pred Value : 0.7731          
##              Prevalence : 0.4080          
##          Detection Rate : 0.2900          
##    Detection Prevalence : 0.4800          
##       Balanced Accuracy : 0.6949          
##                                           
##        'Positive' Class : 0               
## 
classificationMetrics(pred.poly.v, train_v$y)
##         fpr         fnr         tpr         tnr         rec        sens 
## 0.226923077 0.395833333 0.604166667 0.773076923 0.604166667 0.604166667 
##        spec        prec         rpp        lift           F         ppv 
## 0.773076923 0.710784314 0.408000000 0.002961601 0.653153153 0.710784314 
##         fdr         npv         for         plr         nlr         dor 
## 0.289215686 0.679054054 0.320945946 2.662429379 0.512023217 5.199821588 
##         acc         err      microF      macroF    macroRec   macroPrec 
## 0.692000000 0.308000000 0.692000000 0.688087368 0.688621795 0.694919184

RBF Kernel - tuning

# Tuning svm model
models.radial <- tune(svm, y ~ ., data = train_t, kernel = 'radial', ranges = list(cost = c(1, 2), gamma = c(1, 5)))
svm_radial <- models.radial$best.model
svm_radial
## 
## Call:
## best.tune(method = svm, train.x = y ~ ., data = train_t, ranges = list(cost = c(1, 
##     2), gamma = c(1, 5)), kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  5 
## 
## Number of Support Vectors:  787
# Best Parameters:
#    SVM-Type:  C-classification 
#  SVM-Kernel:  radial 
#        cost:  1 
#       gamma:  5 

# Plot the kernel boundary and see how it is segment that 2 classes
plot(svm_radial, train_t)

plot(svm_radial, train_v)

# Predict with Best model
pred.radial.t <- predict(svm_radial, train_t)
confusionMatrix(pred.radial.t, train_t$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 415  82
##          1 175 828
##                                           
##                Accuracy : 0.8287          
##                  95% CI : (0.8086, 0.8474)
##     No Information Rate : 0.6067          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6308          
##  Mcnemar's Test P-Value : 9.535e-09       
##                                           
##             Sensitivity : 0.7034          
##             Specificity : 0.9099          
##          Pos Pred Value : 0.8350          
##          Neg Pred Value : 0.8255          
##              Prevalence : 0.3933          
##          Detection Rate : 0.2767          
##    Detection Prevalence : 0.3313          
##       Balanced Accuracy : 0.8066          
##                                           
##        'Positive' Class : 0               
## 
pred.radial.v <- predict(svm_radial, train_v)
confusionMatrix(pred.radial.v, train_v$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 140  25
##          1  64 271
##                                           
##                Accuracy : 0.822           
##                  95% CI : (0.7856, 0.8545)
##     No Information Rate : 0.592           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6202          
##  Mcnemar's Test P-Value : 5.626e-05       
##                                           
##             Sensitivity : 0.6863          
##             Specificity : 0.9155          
##          Pos Pred Value : 0.8485          
##          Neg Pred Value : 0.8090          
##              Prevalence : 0.4080          
##          Detection Rate : 0.2800          
##    Detection Prevalence : 0.3300          
##       Balanced Accuracy : 0.8009          
##                                           
##        'Positive' Class : 0               
## 
classificationMetrics(pred.radial.v, train_v$y)
##          fpr          fnr          tpr          tnr          rec 
##  0.191044776  0.151515152  0.848484848  0.808955224  0.848484848 
##         sens         spec         prec          rpp         lift 
##  0.848484848  0.808955224  0.686274510  0.408000000  0.004159239 
##            F          ppv          fdr          npv          for 
##  0.758807588  0.686274510  0.313725490  0.915540541  0.084459459 
##          plr          nlr          dor          acc          err 
##  4.441287879  0.187297328 23.712500000  0.822000000  0.178000000 
##       microF       macroF     macroRec    macroPrec 
##  0.822000000  0.808880815  0.828720036  0.800907525
# plotting using kernlab
# Train a radial SVM model with cost of misclassification as 1 and gamma as 5
model.radial_ksvm <- ksvm(y ~ ., data = train_t, kernel = 'rbfdot', C = 1, kpar = list(sigma = 5))
# 
# Plot the kernel boundary and see how it is segment that 2 classes
plot(model.radial_ksvm, data = train_t)

Sigmoid Kernel - tuning

# Tuning svm model
models.sigmoid <- tune.svm(y ~ ., data=train_t, kernel="sigmoid", cost = c(0.1, 1), gamma=c(0.5,1), coef0=c(2,3,4))
svm_sigmoid <- models.sigmoid$best.model
svm_sigmoid
## 
## Call:
## best.svm(x = y ~ ., data = train_t, gamma = c(0.5, 1), coef0 = c(2, 
##     3, 4), cost = c(0.1, 1), kernel = "sigmoid")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  sigmoid 
##        cost:  0.1 
##       gamma:  0.5 
##      coef.0:  4 
## 
## Number of Support Vectors:  1176
# Best Parameters:
#    SVM-Type:  C-classification 
#  SVM-Kernel:  sigmoid 
#        cost:  0.1 
#       gamma:  0.5 
#      coef.0:  4 

# Plot the kernel boundary and see how it is segment that 2 classes
plot(svm_sigmoid, train_t)

plot(svm_sigmoid, train_v)

#Predict with Best model
pred.sigmoid.t <- predict(svm_sigmoid, train_t)
confusionMatrix(pred.sigmoid.t, train_t$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0   8  24
##          1 582 886
##                                          
##                Accuracy : 0.596          
##                  95% CI : (0.5707, 0.621)
##     No Information Rate : 0.6067         
##     P-Value [Acc > NIR] : 0.8086         
##                                          
##                   Kappa : -0.0154        
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.013559       
##             Specificity : 0.973626       
##          Pos Pred Value : 0.250000       
##          Neg Pred Value : 0.603542       
##              Prevalence : 0.393333       
##          Detection Rate : 0.005333       
##    Detection Prevalence : 0.021333       
##       Balanced Accuracy : 0.493593       
##                                          
##        'Positive' Class : 0              
## 
pred.sigmoid.v <- predict(svm_sigmoid, train_v)
confusionMatrix(pred.sigmoid.v, train_v$y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0   0  17
##          1 204 279
##                                           
##                Accuracy : 0.558           
##                  95% CI : (0.5132, 0.6021)
##     No Information Rate : 0.592           
##     P-Value [Acc > NIR] : 0.9439          
##                                           
##                   Kappa : -0.067          
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 0.9426          
##          Pos Pred Value : 0.0000          
##          Neg Pred Value : 0.5776          
##              Prevalence : 0.4080          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0340          
##       Balanced Accuracy : 0.4713          
##                                           
##        'Positive' Class : 0               
## 
classificationMetrics(pred.sigmoid.v, train_v$y)
##        fpr        fnr        tpr        tnr        rec       sens 
## 0.42236025 1.00000000 0.00000000 0.57763975 0.00000000 0.00000000 
##       spec       prec        rpp       lift          F        ppv 
## 0.57763975 0.00000000 0.40800000 0.00000000        NaN 0.00000000 
##        fdr        npv        for        plr        nlr        dor 
## 1.00000000 0.94256757 0.05743243 0.00000000 1.73118280 0.00000000 
##        acc        err     microF     macroF   macroRec  macroPrec 
## 0.55800000 0.44200000 0.55800000        NaN 0.28881988 0.47128378